Data cleaning and auditing

Author

Florencia Grattarola

Published

April 14, 2025

library(httr)
library(jsonlite)
library(countrycode)
library(janitor)
library(readxl)
library(sf)
sf_use_s2(FALSE)
library(tmap)
tmap_mode('view')
library(tidyverse)
options(knitr.kable.NA = '')

Data cleaning

Read data

raw_metadata <- read_xlsx('data/metada_work_version.xlsx', guess_max = 4000) 

Check columns

raw_metadata <- raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) 

Check source fields

The fields are: name_orig, format, and language.

  • Make sure there are no \r, \n, other weird characters or typos.
  • Capitalise and clean language.
# name
raw_metadata %>% 
  filter(grepl('http', name_orig)) %>% 
  group_by(name_orig) %>% count

raw_metadata %>% 
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) %>% 
  group_by(name_orig) %>% 
  summarise(n_countries = n_distinct(country)) %>% 
  select(name_orig, n_countries) %>% 
  print(n=10)

# format
raw_metadata %>% 
  mutate(format = ifelse(format == 'NA', NA, str_squish(format))) %>% 
  distinct(format)

# language
raw_metadata %>% 
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  distinct(language) %>% 
  print(n=50)

DOUBTS

The name_orig is: https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/.
The id values are: [1] 446 447 448 449 450 451 452 453 454 455 456 457.

FIX

# French: Liste rouge des Amphibiens du Grand Est
# English: Red list of Mammals of Grand Est    

raw_metadata %>% 
  mutate(name_orig = ifelse(grepl('htt', name_orig) & 
                               state_province == 'Grand Est',
                             str_glue('Red list of {group} of Grand Est'), name_orig)) %>% 
  filter(state_province == 'Grand Est') %>% select(group, name_orig)

raw_metadata %>% 
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) 

raw_metadata %>% 
  mutate(format = ifelse(format == '?', NA, format)) 

raw_metadata %>% 
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) 

Check Location fields

The fields are: continent, country, state_province, gadm_level_1, gadm_level_2, region_custom, region_detail, and iso_2.

  • Make sure there are no typos, thus duplicates.
  • Capitalise continent, country, state_province names.
  • Check ISO codes.
  • Check GADM levels.
  • Clean region_custom and region_detail.
# check continent
raw_metadata %>% 
  mutate(continent = str_squish(str_replace_all(str_squish(continent), '\\|', ' | '))) %>%
  mutate(continent = str_replace_all(continent, '_', ' ')) %>%
  mutate(continent = str_to_title(continent)) %>% 
  distinct(continent)

# check country
raw_metadata %>% 
  mutate(country = ifelse(country == 'NA', NA, str_squish(country))) %>% 
  mutate(country = str_replace_all(country, '_', ' ')) %>%
  mutate(country = ifelse(country == 'USSR', country, str_to_title(country))) %>%
  mutate(country = str_replace_all(country, 'And ', 'and ')) %>% 
  mutate(country = str_replace_all(country, 'Of', 'of')) %>% 
  mutate(country = str_replace_all(country, 'The', 'the')) %>% 
  distinct(country)

# check state_province
raw_metadata %>% 
  mutate(state_province = ifelse(state_province == 'NA', NA, str_squish(state_province))) %>%
  mutate(state_province = str_to_title(state_province)) %>% 
  mutate(state_province = str_replace_all(state_province, 'And ', 'and ')) %>% 
  mutate(state_province = str_replace_all(state_province, 'Of', 'of')) %>% 
  mutate(state_province = str_replace_all(state_province, 'The', 'the')) %>% 
  filter(!is.na(state_province)) %>% 
  filter(state_province != gadm_level_1) %>% 
  distinct(country, state_province, gadm_level_1, iso_2, iso_3) %>% 
  print(n=100)

# check gadm_level_1 and gadm_level_2
raw_metadata %>% 
  mutate(gadm_level_1 = ifelse(gadm_level_1 == 'NA', NA, str_squish(gadm_level_1))) %>%
  mutate(gadm_level_1 = str_to_title(gadm_level_1)) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'And ', 'and ')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'Of', 'of')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'The', 'the')) %>% 
  filter(!is.na(gadm_level_1)) %>% distinct(gadm_level_1) %>% 
  arrange(gadm_level_1) %>% print(n=100)

raw_metadata %>% 
  mutate(gadm_level_2 = ifelse(gadm_level_2 == 'NA', NA, str_squish(gadm_level_2))) %>%
  mutate(gadm_level_2 = str_to_title(gadm_level_2)) %>% 
  filter(!is.na(gadm_level_2)) %>% select(gadm_level_2)

# check region_custom and region_detal
raw_metadata %>% 
  mutate(region_custom = ifelse(region_custom == 'NA', NA, str_squish(region_custom))) %>% 
  mutate(region_detail = ifelse(region_detail == 'NA', NA, str_squish(region_detail))) %>% 
  mutate(region_detail = str_squish(str_replace_all(str_squish(region_detail), '\\|', ' | '))) %>%
  filter(!is.na(region_custom)) %>%
  distinct(region_custom, region_detail, iso_2, iso_3) %>% 
  print(n=100)

# check iso_2 and iso_3
raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  mutate(iso_2 = ifelse(iso_2 == 'NA' & country != 'Namibia', NA, str_squish(iso_2))) %>% 
  mutate(iso_2 = str_squish(str_replace_all(str_squish(iso_2), '\\|', ' | '))) %>%
  mutate(iso_3 = ifelse(iso_3 == 'NA', NA, str_squish(iso_3))) %>% 
  mutate(iso_3 = str_squish(str_replace_all(str_squish(iso_3), '\\|', ' | '))) %>%
  select(country, iso_2, iso_3, region_custom, region_detail) %>%  
  filter(is.na(iso_2)) #%>% distinct()

DOUBTS

Check Taxon fields

The fields are: kingdom, phylum, subphylum, class, order, and group.

  • Check duplicates in all fields.
  • Rename group to taxa.

I searched names using the GBIF backbone, using my own custom function nameMatcherGBIF().

# gbif name parser
nameMatcherGBIF <- function(sp_name_list) {
  
  # api <- 'http://api.gbif.org/v1/parser/name'
  api <- 'http://api.gbif.org/v1/species/match'
  
  name_parsed <- tibble(sp_name = character(),
                        scientificName = character(),
                        kingdom = character(),
                        phylum = character(),
                        class = character(),
                        order = character(),
                        family = character(),
                        genus = character(),
                        specificEpithet = character(),
                        species = character(),
                        status = character(), 
                        rank = character())
  
  for(sp_name in sp_name_list){
    # cat(sp_name, '\n')
    call_url <- str_glue('{api}?name={sp_name}&strict=true&verbose=false')
    get_json_call <- GET(url = URLencode(call_url)) %>%
      content(as = "text") %>% fromJSON(flatten = TRUE)
    
    if(get_json_call$matchType == 'NONE') {
      name_parsed_i <- tibble(sp_name = sp_name,
                              scientificName = NA,
                              kingdom = NA,
                              phylum = NA,
                              class = NA,
                              order = NA,
                              family = NA,
                              genus = NA,
                              specificEpithet = NA,
                              species = NA,
                              status = NA, 
                              rank = NA)
      name_parsed <- rbind(name_parsed, name_parsed_i)
    } else{
      name_parsed_i <- tibble(sp_name = sp_name,
                              scientificName = ifelse(exists('scientificName',get_json_call), get_json_call$scientificName, NA),
                              kingdom = ifelse(exists('kingdom',get_json_call), get_json_call$kingdom, NA),
                              phylum = ifelse(exists('phylum',get_json_call), get_json_call$phylum, NA),
                              class = ifelse(exists('class',get_json_call), get_json_call$class, NA),
                              order = ifelse(exists('order',get_json_call), get_json_call$order, NA),
                              family = ifelse(exists('family',get_json_call), get_json_call$family, NA),
                              genus = ifelse(exists('genus',get_json_call), get_json_call$genus, NA),
                              specificEpithet = ifelse(exists('specificEpithet',get_json_call), get_json_call$specificEpithet, NA),
                              species = ifelse(exists('species',get_json_call), get_json_call$species, NA),
                              status = ifelse(exists('status',get_json_call), get_json_call$status, NA), 
                              rank = ifelse(exists('rank',get_json_call), get_json_call$rank, NA))
      
      name_parsed <- rbind(name_parsed, name_parsed_i)
    }
  }
  return(name_parsed)
}

sp_list <- raw_metadata %>% 
  distinct(kingdom, phylum, subphylum, class, order, group) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  pull(group)

sp_list_matched <- nameMatcherGBIF(sp_list) %>% suppressMessages()

sp_list_matched <- sp_list_matched %>% 
  mutate(scientificName = case_when(grepl('flora', sp_name, ignore.case=T) ~ 'Plantae',
                                    grepl('fauna', sp_name, ignore.case=T) ~ 'Animalia',
                                    grepl('tunicata', sp_name, ignore.case=T) ~ NA,
                                    .default = scientificName)) %>% 
  mutate(kingdom = case_when(grepl('flora', sp_name, ignore.case=T) ~ 'Plantae',
                             grepl('fauna', sp_name, ignore.case=T) ~ 'Animalia',
                             grepl('tunicata', sp_name, ignore.case=T) ~ NA,
                             .default = kingdom)) %>% 
  mutate(phylum = ifelse(phylum == 'chordata', 'Chordata', phylum))

sp_list_unmatched <- sp_list_matched %>% 
  filter(is.na(scientificName)) %>% pull(sp_name)

sp_list_matched %>% filter(!is.na(kingdom)) %>% nrow() # matched
[1] 205
length(sp_list_unmatched) # not matched
[1] 358

When the taxon name (i.e., group) was not found I kept the previous taxonomic fields’ values.

merged_list <- left_join(raw_metadata %>%
                           mutate(group = str_trim(group)) %>%
                           distinct(group) %>%
                           arrange(group),
                         sp_list_matched %>%
                           filter(!is.na(scientificName)) %>%
                           rename(group=sp_name) %>% distinct()) %>%
  arrange(group)

raw_metadata_taxon_list <- raw_metadata %>%
  mutate(group = str_trim(group)) %>% 
  distinct(group, .keep_all = T) %>% 
  select(kingdom, phylum, subphylum, class, order, group) %>% 
  arrange(group)

left_join(raw_metadata %>% mutate(group = str_trim(group)) %>%
            select(-c(kingdom,phylum,subphylum,class,order)),
          bind_rows(merged_list %>% 
                      filter(!is.na(scientificName)),
                    merged_list %>% 
                      filter(is.na(scientificName)) %>%
                      select(group) %>% 
                      left_join(. , raw_metadata_taxon_list))) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  mutate(group = str_replace_all(group, 'Et Al.', 'et al.')) %>%
  distinct(group, kingdom, phylum, class, order, family, rank) %>% 
  arrange(kingdom, phylum, class, order) %>% 
  kableExtra::kbl(booktabs = T) %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
group kingdom phylum class order family rank
Earthworms Animalia Anelida Clitellata Opisthopora
Oligochaeta Animalia Anelida Clitellata
Onychophora Animalia Animalia
Branchiobdellida Animalia Annelida Clitellata Branchiobdellida ORDER
Hirudinea Animalia Annelida Clitellata
Worms Animalia Annelida Clitellata
Leech Animalia Annelida Clitellata
Leeches Animalia Annelida Clitellata
Echiurida Animalia Annelida Echiura Echiuroidea
Sipunculids Animalia Annelida Sipuncula
Amblypygi Animalia Arthropoda Arachnida Amblypygi ORDER
Spiders Animalia Arthropoda Arachnida Araneae
Uropodina Animalia Arthropoda Arachnida Mesostigmata
Opiliones Animalia Arthropoda Arachnida Opiliones ORDER
Opilioness Animalia Arthropoda Arachnida Opiliones
Pseudoscorpiones Animalia Arthropoda Arachnida Pseudoscorpiones ORDER
False Scorpions Animalia Arthropoda Arachnida Pseudoscorpions
Scorpions Animalia Arthropoda Arachnida Scorpionida
Arachnida Animalia Arthropoda Arachnida CLASS
Anostraca Animalia Arthropoda Branchiopoda Anostraca ORDER
Branchiopoda Animalia Arthropoda Branchiopoda CLASS
Centipedes Animalia Arthropoda Chilopoda
Chilopoda Animalia Arthropoda Chilopoda CLASS
Collembola Animalia Arthropoda Collembola CLASS
Entomostraca Animalia Arthropoda Copepoda
Copepoda Animalia Arthropoda Copepoda CLASS
Millipedes Animalia Arthropoda Diplopoda
Diplopoda Animalia Arthropoda Diplopoda CLASS
Archaeognatha Animalia Arthropoda Insecta Archaeognatha ORDER
Blattodea Animalia Arthropoda Insecta Blattodea ORDER
Wood Cockroaches Animalia Arthropoda Insecta Blattodea
Cockroaches Animalia Arthropoda Insecta Blattodea
Hydraenidae Animalia Arthropoda Insecta Coleoptera Hydraenidae FAMILY
Coleoptera Animalia Arthropoda Insecta Coleoptera ORDER
Carabidae Animalia Arthropoda Insecta Coleoptera Carabidae FAMILY
Saproxylic Beetles Animalia Arthropoda Insecta Coleoptera
Beetles Animalia Arthropoda Insecta Coleoptera
Longhorn and Scarab Beetles Animalia Arthropoda Insecta Coleoptera
Longhorn Beetles Animalia Arthropoda Insecta Coleoptera
Scarabaeidae Animalia Arthropoda Insecta Coleoptera Scarabaeidae FAMILY
Ladybugs Animalia Arthropoda Insecta Coleoptera
Water Beetles Animalia Arthropoda Insecta Coleoptera
Tenebrionidae Animalia Arthropoda Insecta Coleoptera Tenebrionidae FAMILY
Soldier Beetles Animalia Arthropoda Insecta Coleoptera
Leaf Beetles Animalia Arthropoda Insecta Coleoptera
Histeridae Animalia Arthropoda Insecta Coleoptera Histeridae FAMILY
Sphaeritidae Animalia Arthropoda Insecta Coleoptera Sphaeritidae FAMILY
Derodontidoidea Animalia Arthropoda Insecta Coleoptera
Bostrichoidea Animalia Arthropoda Insecta Coleoptera
Staphylinidae Animalia Arthropoda Insecta Coleoptera Staphylinidae FAMILY
Lucanidae Animalia Arthropoda Insecta Coleoptera Lucanidae FAMILY
Geotrupidae Animalia Arthropoda Insecta Coleoptera Geotrupidae FAMILY
Trogidae Animalia Arthropoda Insecta Coleoptera Trogidae FAMILY
Silphidae Animalia Arthropoda Insecta Coleoptera Silphidae FAMILY
Chrysomelidae Animalia Arthropoda Insecta Coleoptera Chrysomelidae FAMILY
Bark Beetles Animalia Arthropoda Insecta Coleoptera
Ground Beetles Animalia Arthropoda Insecta Coleoptera
Curculionidae Animalia Arthropoda Insecta Coleoptera Curculionidae FAMILY
Powderpost Beetles Animalia Arthropoda Insecta Coleoptera
Bostrichidae Animalia Arthropoda Insecta Coleoptera Bostrichidae FAMILY
Anobiidae Animalia Arthropoda Insecta Coleoptera Anobiidae FAMILY
Ptinidae Animalia Arthropoda Insecta Coleoptera Ptinidae FAMILY
Deadwood Beetle Animalia Arthropoda Insecta Coleoptera
Buprestidae Animalia Arthropoda Insecta Coleoptera Buprestidae FAMILY
Snout Beetles Animalia Arthropoda Insecta Coleoptera
Staphylinoidea Animalia Arthropoda Insecta Coleoptera
Cucujoidea Animalia Arthropoda Insecta Coleoptera
Lamellicornia Animalia Arthropoda Insecta Coleoptera
Seed Beetles Animalia Arthropoda Insecta Coleoptera
Weevils Animalia Arthropoda Insecta Coleoptera
Anthribidae Animalia Arthropoda Insecta Coleoptera Anthribidae FAMILY
Platypodidae Animalia Arthropoda Insecta Coleoptera Curculionidae FAMILY
Ground Beetle Animalia Arthropoda Insecta Coleoptera
Tiger Beetles Animalia Arthropoda Insecta Coleoptera
Megalopodidae Animalia Arthropoda Insecta Coleoptera Megalopodidae FAMILY
Scarabaeoidea Animalia Arthropoda Insecta Coleoptera
Cerambycidae Animalia Arthropoda Insecta Coleoptera Cerambycidae FAMILY
Curculionoidea Animalia Arthropoda Insecta Coleoptera
Cleroidea Animalia Arthropoda Insecta Coleoptera
Elateridae Animalia Arthropoda Insecta Coleoptera Elateridae FAMILY
Lymexyloidea Animalia Arthropoda Insecta Coleoptera
Cicindelidae Animalia Arthropoda Insecta Coleoptera Carabidae FAMILY
Lucanoidea Animalia Arthropoda Insecta Coleoptera
Hydrophilidae Animalia Arthropoda Insecta Coleoptera Hydrophilidae FAMILY
Platypsyllinae Animalia Arthropoda Insecta Coleoptera
Cholevinae Animalia Arthropoda Insecta Coleoptera
Malachiidae Animalia Arthropoda Insecta Coleoptera Malachiidae FAMILY
Melyridae Animalia Arthropoda Insecta Coleoptera Melyridae FAMILY
Phloeophilidae Animalia Arthropoda Insecta Coleoptera Phloiophilidae FAMILY
Cleridae Animalia Arthropoda Insecta Coleoptera Cleridae FAMILY
Cerophytidae Animalia Arthropoda Insecta Coleoptera Cerophytidae FAMILY
Eucnemidae Animalia Arthropoda Insecta Coleoptera Eucnemidae FAMILY
Cryptophagidae Animalia Arthropoda Insecta Coleoptera Cryptophagidae FAMILY
Latridiidae Animalia Arthropoda Insecta Coleoptera Latridiidae FAMILY
Mycetophagidae Animalia Arthropoda Insecta Coleoptera Mycetophagidae FAMILY
Zopheridae Animalia Arthropoda Insecta Coleoptera Zopheridae FAMILY
Monotomidae Animalia Arthropoda Insecta Coleoptera Monotomidae FAMILY
Phalacridae Animalia Arthropoda Insecta Coleoptera Phalacridae FAMILY
Pyrochroide Animalia Arthropoda Insecta Coleoptera
Meloidae Animalia Arthropoda Insecta Coleoptera
Orsodacnidae Animalia Arthropoda Insecta Coleoptera Orsodacnidae FAMILY
Donaciinae Animalia Arthropoda Insecta Coleoptera
Leptinidae Animalia Arthropoda Insecta Coleoptera Leiodidae FAMILY
Lissomidae Animalia Arthropoda Insecta Coleoptera Elateridae FAMILY
Derodontoidae Animalia Arthropoda Insecta Coleoptera
Leiodidae Animalia Arthropoda Insecta Coleoptera Leiodidae FAMILY
Oedemeridae Animalia Arthropoda Insecta Coleoptera Oedemeridae FAMILY
Melandryidae Animalia Arthropoda Insecta Coleoptera Melandryidae FAMILY
Dung Beetles Animalia Arthropoda Insecta Coleoptera
Earwigs Animalia Arthropoda Insecta Dermaptera
Heteroptera Animalia Arthropoda Insecta Diptera Sphaeroceridae GENUS
Acalyptratae Animalia Arthropoda Insecta Diptera
Calypterate Animalia Arthropoda Insecta Diptera
Larger Brachycera Animalia Arthropoda Insecta Diptera
Dolichopodid Animalia Arthropoda Insecta Diptera
Lonchopteridae Animalia Arthropoda Insecta Diptera Lonchopteridae FAMILY
Platypezidae Animalia Arthropoda Insecta Diptera Platypezidae FAMILY
Opetiidae Animalia Arthropoda Insecta Diptera Opetiidae FAMILY
Hoverflies Animalia Arthropoda Insecta Diptera
Chaoboridae Animalia Arthropoda Insecta Diptera Chaoboridae FAMILY
Thaumaleidae Animalia Arthropoda Insecta Diptera Thaumaleidae FAMILY
Ceratopogonidae Animalia Arthropoda Insecta Diptera Ceratopogonidae FAMILY
Flies Animalia Arthropoda Insecta Diptera
Asilidae Animalia Arthropoda Insecta Diptera Asilidae FAMILY
Psychodidae Animalia Arthropoda Insecta Diptera Psychodidae FAMILY
Dixidae Animalia Arthropoda Insecta Diptera Dixidae FAMILY
Soldier Flies Animalia Arthropoda Insecta Diptera
Horse-Flies Animalia Arthropoda Insecta Diptera
Bee Flies Animalia Arthropoda Insecta Diptera
Empididae Animalia Arthropoda Insecta Diptera Empididae FAMILY
Conopidae Animalia Arthropoda Insecta Diptera Conopidae FAMILY
Chironomidae Animalia Arthropoda Insecta Diptera Chironomidae FAMILY
Dolichopodidae Animalia Arthropoda Insecta Diptera Dolichopodidae FAMILY
Black Flies Animalia Arthropoda Insecta Diptera
Long-Legged Flies Animalia Arthropoda Insecta Diptera
Micropezidae Animalia Arthropoda Insecta Diptera Micropezidae FAMILY
Grass Flies Animalia Arthropoda Insecta Diptera
Tachinidae Animalia Arthropoda Insecta Diptera Tachinidae FAMILY
Aquatic Empididae Animalia Arthropoda Insecta Diptera
Pediciidae Animalia Arthropoda Insecta Diptera Pediciidae FAMILY
Limoniidae Animalia Arthropoda Insecta Diptera Limoniidae FAMILY
Diptera Animalia Arthropoda Insecta Diptera
Parasitic Diptera Animalia Arthropoda Insecta Diptera
Mayflies Animalia Arthropoda Insecta Ephemeroptera
Auchenorrhyncha Animalia Arthropoda Insecta Hemiptera
Cicadas Animalia Arthropoda Insecta Hemiptera
Shieldbugs Animalia Arthropoda Insecta Hemiptera
Clavicornia Animalia Arthropoda Insecta Hemiptera Aradidae GENUS
Big-Eyed Bugs Animalia Arthropoda Insecta Hemiptera
Nepomorpha Animalia Arthropoda Insecta Hemiptera
Fulgoromorpha Animalia Arthropoda Insecta Hemiptera
Cicadomorpha Animalia Arthropoda Insecta Hemiptera Palaeontinidae GENUS
Scale Insect Animalia Arthropoda Insecta Hemiptera
Hemiptera Animalia Arthropoda Insecta Hemiptera ORDER
Ants Animalia Arthropoda Insecta Hymenoptera
Bombus Spp. Animalia Arthropoda Insecta Hymenoptera
Hymenoptera Animalia Arthropoda Insecta Hymenoptera ORDER
Bees Animalia Arthropoda Insecta Hymenoptera
Diversicornia Animalia Arthropoda Insecta Hymenoptera Encyrtidae GENUS
Sawflies Animalia Arthropoda Insecta Hymenoptera
Spheciformes Animalia Arthropoda Insecta Hymenoptera
Pompilidae Animalia Arthropoda Insecta Hymenoptera Pompilidae FAMILY
Chrysididae Animalia Arthropoda Insecta Hymenoptera Chrysididae FAMILY
Scolioidea Animalia Arthropoda Insecta Hymenoptera
Cuckoo Wasp Animalia Arthropoda Insecta Hymenoptera
Wasps Animalia Arthropoda Insecta Hymenoptera
Sphecidae Animalia Arthropoda Insecta Hymenoptera Sphecidae FAMILY
Wild Bees Animalia Arthropoda Insecta Hymenoptera
Scoliidae Animalia Arthropoda Insecta Hymenoptera Scoliidae FAMILY
Crabronidae et al. Animalia Arthropoda Insecta Hymenoptera
Chrysididae et al. Animalia Arthropoda Insecta Hymenoptera
Symphyta Animalia Arthropoda Insecta Hymenoptera
Stinging Wasps Animalia Arthropoda Insecta Hymenoptera
Mutillidae Animalia Arthropoda Insecta Hymenoptera Mutillidae FAMILY
Sapygidae Animalia Arthropoda Insecta Hymenoptera Sapygidae FAMILY
Tiphiidae Animalia Arthropoda Insecta Hymenoptera Tiphiidae FAMILY
Cimbicidae Animalia Arthropoda Insecta Hymenoptera Cimbicidae FAMILY
Siricidae Animalia Arthropoda Insecta Hymenoptera Siricidae FAMILY
Xiphydriidae Animalia Arthropoda Insecta Hymenoptera Xiphydriidae FAMILY
Ampulicidae Animalia Arthropoda Insecta Hymenoptera Ampulicidae FAMILY
Crabronidae Animalia Arthropoda Insecta Hymenoptera Crabronidae FAMILY
Apoidea Animalia Arthropoda Insecta Hymenoptera
Lepidoptera Animalia Arthropoda Insecta Lepidoptera ORDER
Moths Animalia Arthropoda Insecta Lepidoptera
Butterflies Animalia Arthropoda Insecta Lepidoptera
Papilionoidea Animalia Arthropoda Insecta Lepidoptera
Hesperioidea Animalia Arthropoda Insecta Lepidoptera
Noctuidae Animalia Arthropoda Insecta Lepidoptera Noctuidae FAMILY
Night Butterflies Animalia Arthropoda Insecta Lepidoptera
Owlet Moths Animalia Arthropoda Insecta Lepidoptera
Lymantriinae Animalia Arthropoda Insecta Lepidoptera
Short-Cloaked Moth Animalia Arthropoda Insecta Lepidoptera
Geometer Moths Animalia Arthropoda Insecta Lepidoptera
Bombyces Animalia Arthropoda Insecta Lepidoptera
Sphinges S.l. Animalia Arthropoda Insecta Lepidoptera
Pyralidae Animalia Arthropoda Insecta Lepidoptera Pyralidae FAMILY
Sphinges Animalia Arthropoda Insecta Lepidoptera
Geometridae Animalia Arthropoda Insecta Lepidoptera Geometridae FAMILY
Makrolepidoptera Animalia Arthropoda Insecta Lepidoptera
Macrolepidoptera Animalia Arthropoda Insecta Lepidoptera
Microlepidoptera Animalia Arthropoda Insecta Lepidoptera
Zygaenidae Animalia Arthropoda Insecta Lepidoptera Zygaenidae FAMILY
Sphingidae Animalia Arthropoda Insecta Lepidoptera Sphingidae FAMILY
Sesiidae Animalia Arthropoda Insecta Lepidoptera Sesiidae FAMILY
Psychidae Animalia Arthropoda Insecta Lepidoptera Psychidae FAMILY
Pterophoridae Animalia Arthropoda Insecta Lepidoptera Pterophoridae FAMILY
Alucitidae Animalia Arthropoda Insecta Lepidoptera Alucitidae FAMILY
Crambidae Animalia Arthropoda Insecta Lepidoptera Crambidae FAMILY
Torticidae Animalia Arthropoda Insecta Lepidoptera
Choreutidae Animalia Arthropoda Insecta Lepidoptera Choreutidae FAMILY
Hawk Moths Animalia Arthropoda Insecta Lepidoptera
Bombycidae Animalia Arthropoda Insecta Lepidoptera Bombycidae FAMILY
Pantheidae Animalia Arthropoda Insecta Lepidoptera Noctuidae FAMILY
Nolidae Animalia Arthropoda Insecta Lepidoptera Nolidae FAMILY
Sessidae Animalia Arthropoda Insecta Lepidoptera
Erebidae Animalia Arthropoda Insecta Lepidoptera Erebidae FAMILY
Mantodea Animalia Arthropoda Insecta Mantodea ORDER
Mecoptera Animalia Arthropoda Insecta Mecoptera ORDER
Scorpionflies Animalia Arthropoda Insecta Mecoptera
Megaloptera Animalia Arthropoda Insecta Megaloptera
Neuroptera Animalia Arthropoda Insecta Neuroptera ORDER
Owlflies Animalia Arthropoda Insecta Neuroptera
Net-Winged Insects Animalia Arthropoda Insecta Neuroptera
Odonata Animalia Arthropoda Insecta Odonata ORDER
Orthoptera Animalia Arthropoda Insecta Orthoptera ORDER
Grasshoppers Animalia Arthropoda Insecta Orthoptera
Ensifera Animalia Arthropoda Insecta Orthoptera
Caelifera Animalia Arthropoda Insecta Orthoptera
Crickets Animalia Arthropoda Insecta Orthoptera
Katydids Animalia Arthropoda Insecta Orthoptera
Stick Insects Animalia Arthropoda Insecta Phasmatodea
Stoneflies Animalia Arthropoda Insecta Plecoptera
Snakeflies Animalia Arthropoda Insecta Raphidioptera
Thrips Animalia Arthropoda Insecta Thysanoptera Thripidae GENUS
Trichoptera Animalia Arthropoda Insecta Trichoptera ORDER
Caddisflies Animalia Arthropoda Insecta Trichoptera
Plecoptera Animalia Arthropoda Insecta CLASS
Insects Animalia Arthropoda Insecta
Aquatic and Semi-Aquatic Bugs Animalia Arthropoda Insecta
Insecta Animalia Arthropoda Insecta CLASS
Water Bugs Animalia Arthropoda Insecta
Woodlice Animalia Arthropoda Isopoda
Amphipoda Animalia Arthropoda Malacostraca Amphipoda ORDER
Niphargidae Animalia Arthropoda Malacostraca Amphipoda Niphargidae FAMILY
Cumacea Animalia Arthropoda Malacostraca Cumacea ORDER
Decapoda Animalia Arthropoda Malacostraca Decapoda ORDER
Crayfishes Animalia Arthropoda Malacostraca Decapoda
Astacoidea Animalia Arthropoda Malacostraca Decapoda
Freshwater Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda
Marine Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda
Marine Isopoda Animalia Arthropoda Malacostraca Isopoda
Malacostraca Animalia Arthropoda Malacostraca CLASS
Mysidacea Animalia Arthropoda Malacostraca CLASS
Barnacles Animalia Arthropoda Thecostraca
Freshwater Crabs Animalia Arthropoda Decapoda
Horseshoe Crabs Animalia Arthropoda Xiphosura
Crustaceans Animalia Arthropoda
Arthropods Animalia Arthropoda
Freshwater Crustaceans Animalia Arthropoda
Crayfish Animalia Arthropoda
Sea Spiders Animalia Arthropoda
Marine Crustaceans Animalia Arthropoda
Miscellaneous Arthropods Animalia Arthropoda
Myriapods Animalia Arthropoda
Brachiopods Animalia Brachiopoda
Marine Bryozoans Animalia Bryoza
Bryozoa Animalia Bryozoa PHYLUM
Bony Fishes Animalia Chordata Actinopterygii
Frogs Animalia Chordata Amphibia Anura
Amphibians Animalia Chordata Amphibia
Sea Squirts Animalia Chordata Ascidiacea
Anatidae Animalia Chordata Aves Anseriformes Anatidae FAMILY
Galliformes Animalia Chordata Aves Galliformes ORDER
Birds Animalia Chordata Aves
Breeding Birds Animalia Chordata Aves
Wintering Birds Animalia Chordata Aves
Transient Birds Animalia Chordata Aves
Birds Terre Adelie Animalia Chordata Aves
Birds Terres Australes Animalia Chordata Aves
Birds Scattered Islands Animalia Chordata Aves
Migratory Birds Animalia Chordata Aves
Metropolitan Birds Animalia Chordata Aves
Endemic Brids Animalia Chordata Aves
Breeding Birds of Prey Animalia Chordata Aves
Endangered Birds Animalia Chordata Aves
Birds Ecuador Animalia Chordata Aves
Birds Galapagos Animalia Chordata Aves
Birds of Prey Animalia Chordata Aves
Breeding Raptors Animalia Chordata Aves
Freshwater Lamprey Animalia Chordata Cephalaspidomorphi
Sharks Animalia Chordata Chondrichthyes
Chondrichthyes Animalia Chordata Chondrichthyes
Lamprey Animalia Chordata Hyperoartia
Cetaceans Animalia Chordata Mammalia Artiodactyla
Bats Animalia Chordata Mammalia Chiroptera
Perissodactyla Animalia Chordata Mammalia Perissodactyla ORDER
Primates Animalia Chordata Mammalia Primates ORDER
Lemurs Animalia Chordata Mammalia Primates
Rodents Animalia Chordata Mammalia Rodentia
Mammals Animalia Chordata Mammalia
Terrestrial Mammals Animalia Chordata Mammalia
Marine Mammals Animalia Chordata Mammalia
Mammals Scattered Islands Animalia Chordata Mammalia
Terrestial Mammals Animalia Chordata Mammalia
Aquatic Mammals Animalia Chordata Mammalia
Metropolitan Mammals Animalia Chordata Mammalia
Endemic Mammals Animalia Chordata Mammalia
Large Mammals Animalia Chordata Mammalia
Insectivores Animalia Chordata Mammalia
Carnivores Animalia Chordata Mammalia
Marine Cetartiodactyla Animalia Chordata Mammalia
Terrestrial Cetartiodactyla Animalia Chordata Mammalia
Proboscidea & Sirenia Animalia Chordata Mammalia
Endangered Mammals Animalia Chordata Mammalia
Ungulates Animalia Chordata Mammalia
Land Mammals Animalia Chordata Mammalia
Caimans Animalia Chordata Reptilia Crocodilia
Chameleons Animalia Chordata Reptilia Squamata
Lizards and Worm-Lizards Animalia Chordata Reptilia Squamata
Snakes Animalia Chordata Reptilia Squamata
Marine Turtles Animalia Chordata Reptilia Testudines
Turtles Animalia Chordata Reptilia Testudines
Sea Turtles Animalia Chordata Reptilia Testudines
Reptiles Animalia Chordata Reptilia
Terrestrial Reptiles Animalia Chordata Reptilia
Endemic Lizards Animalia Chordata Reptilia
Endemic Reptiles Animalia Chordata Reptilia
Fishes Animalia Chordata
Lampreys Animalia Chordata
Freshwater Fishes Animalia Chordata
Marine Fishes Animalia Chordata
Tunicata Animalia Chordata
Lancelets Animalia Chordata
Reef Fishes Animalia Chordata
Terrestrial Vertebrates Animalia Chordata
Freshwater and Migratory Fishes Animalia Chordata
Cyclostomata Animalia Chordata
Endangered Vertebrates Animalia Chordata
Endemic Freshwater Fishes Animalia Chordata
Linefishes Animalia Chordata
Brackish and Freshwater Fishes Animalia Chordata
Corals Animalia Cnidaria
Cnidaria Animalia Cnidaria PHYLUM
Reef Corals Animalia Cnidaria
Marine Cnidaria Animalia Cnidaria
Echinoderms Animalia Echinodermata
Acorn Worms Animalia Hemichordata Enteropneusta
Bivalvia Animalia Mollusca Bivalvia CLASS
Marine Bivalves Animalia Mollusca Bivalvia
Mussels Animalia Mollusca Bivalvia
Musslels Animalia Mollusca Bivalvia
Cephalopods Animalia Mollusca Cephalopoda
Gastropoda Animalia Mollusca Gastropoda CLASS
Snails Animalia Mollusca Gastropoda
Marine Snails Animalia Mollusca Gastropoda
Terrestrial Gastropods Animalia Mollusca Gastropoda
Freshwater Gastropods Animalia Mollusca Gastropoda
Mollusca Animalia Mollusca PHYLUM
Molluscs Animalia Mollusca
Terrestrial Molluscs Animalia Mollusca
Non-Marine Molluscs Animalia Mollusca
Inland Molluscs Animalia Mollusca
Species-Poor Groups of Marine Molluscs Animalia Mollusca
Freswater Mollusc Animalia Mollusca
Extramarine Molluscs Animalia Mollusca
Nematoda Animalia Nematoda PHYLUM
Ribbon Worms Animalia Nemertea
Flatworms Animalia Platyhelminthes Turbellaria
Porifera Animalia Porifera PHYLUM
Sea Sponges Animalia Porifera
Vertebrates Animalia chordata
Fauna Animalia
Cave Fauna Animalia
Polychaeta Animalia KINGDOM
Invertebrates Animalia
Fauna_en_higher Animalia
Fauna_nt_lc_dd Animalia
Fauna_en_vu Animalia
Terrestrial Invertebrates Animalia
Aquatic Invertebrates Animalia
Fauna_flagship Species Animalia
Endemic Fauna Animalia
Protected Animals Animalia
Endangered Fauna Animalia
Selected Species Animalia
Marine Species Animalia
Endangered Species Animalia
Marine Invertebrates Animalia
Freshwater Plants Animalia
Flora Visiting Fauna Animalia
Other Invertebrates Animalia
Other Marine Invertebrates Animalia
Marine Fauna Animalia
Endemic Animals Animalia
Vaucheriaceae Chromista Ochrophyta Xanthophyceae Vaucheriales Vaucheriaceae FAMILY
Ascomycota Fungi Ascomycota PHYLUM
Agaricales Fungi Basidiomycota Agaricomycetes Agaricales ORDER
Boletaceae Fungi Basidiomycota Agaricomycetes Boletales Boletaceae FAMILY
Boletales Fungi Basidiomycota Agaricomycetes Boletales ORDER
Russulales Fungi Basidiomycota Agaricomycetes Russulales ORDER
Ustilaginales Fungi Basidiomycota Ustilaginomycetes Ustilaginales ORDER
Basidiomycota Fungi Basidiomycota PHYLUM
Fungi Fungi KINGDOM
Macromycetes Fungi
Mushrooms Fungi
Macrofungi Fungi
Phytoparasitic Small Fungi Fungi
Large Mushrooms Fungi
Lichenicolous Fungus Fungi
Ascomycetes Fungi
Aphyllophorales Fungi
Phytoparasitic Microfungi Fungi
Characeae Plantae Charophyta Charophyceae Charales Characeae FAMILY
Charophyceae Plantae Charophyta Charophyceae CLASS
Desmidiales Plantae Charophyta Conjugatophyceae Desmidiales ORDER
Zygnematophyceae Plantae Charophyta Zygnematophyceae CLASS
Freshwater Diatoms Plantae Gyrista Bacillariophyceae
Marchantiophyta Plantae Marchantiophyta PHYLUM
Apiaceae Plantae Tracheophyta Magnoliopsida Apiales Apiaceae FAMILY
Cactaceae Plantae Tracheophyta Magnoliopsida Caryophyllales Cactaceae FAMILY
Magnoliaceae Plantae Tracheophyta Magnoliopsida Magnoliales Magnoliaceae FAMILY
Anisoptera Plantae Tracheophyta Magnoliopsida Malvales Dipterocarpaceae GENUS
Dipterocarpaceae Plantae Tracheophyta Magnoliopsida Malvales Dipterocarpaceae FAMILY
Magnoliophyta Plantae Tracheophyta PHYLUM
Charophytes Plantae Charophyceae Charales
Lycopods Plantae Lycopodiopsida Lycopodiales
Tree Ferns Plantae Polypodiopsida
Sphagnum Mosses Plantae Sphagnopsida
Orchids Plantae Asparagales
Wild Cinnamon Plantae Laurales
Bromeliads Plantae Poales
Flora Plantae
Bryophytes Plantae
Ferns Plantae
Vascular Plants Plantae
Lichens Plantae
Hydrophytes Plantae
Marine Flora Plantae
Hornworts Plantae
Liverworts Plantae
Mosses Plantae
Flora Saint Paul and Amsterdam Plantae
Flora Scattered Islands Plantae
Flora Kerguelen Plantae
Endemic Flora Plantae
Trees Plantae
Shrubs Plantae
Algae Plantae
Arctic Vascular Plants Plantae
Marine Macroalgae Plantae
Freshwater Red Algae Plantae
Freshwater Brown Algae Plantae
Flowering Plant Plantae
Red Algae Plantae
Brown Algae Plantae
Hepaticophyta Plantae
Broad-Leaved Mosses Plantae
Lichen Communities Plantae
Flora of Cerrado Biom Plantae
Endemic Plants Plantae
Flora On the Red List Plantae
Protected Plants Plantae
Endangered Plants Plantae
Near-Endemic Flora Plantae
Perennial Shrubs Plantae
Flora_2 Plantae
Endemic and Range-Restricted Vascular Plantss Plantae
Indigenous Plants Plantae
Selected Species In Marshlands Plantae
Conifers Plantae
Peninsular Planrs Plantae
Lycophytes Plantae
Higher Plants Plantae
Cloud Forest Trees Plantae
Spermatophytes Plantae
Palms Plantae
Wild Crop Relatives Plantae
Aquatic Plants Plantae
Medicinal Plants Plantae
Dry Forest Trees Plantae
Monocotyledons Plantae
Freshwater Flora Plantae
Flora List Plantae
Endemic Trees Plantae
Myxomycetes Protozoa Mycetozoa Myxomycetes CLASS
Protozoa Protozoa KINGDOM
Zygoptera Protozoa GENUS

Check Event fields

The fields is: year

  • Check it has numeric values.
raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  select(year, name_orig) %>% 
  mutate(year = ifelse(year == 'NA', NA, year)) %>% 
  mutate(year_new = as.numeric(year, na.rm=T)) %>% 
  filter(is.na(year_new)) %>% 
  distinct(name_orig, .keep_all = T) 

raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  mutate(year = ifelse(year == 'NA', NA, year)) %>% 
  mutate(year = ifelse(year == '2024?', 2024, year)) %>%
  mutate(year = as.numeric(year, na.rm=T)) %>% 
  mutate(year = case_when(is.na(year) & 
                            name_orig == 'Crveni popis hrvatskih koralja' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'A Red List of Benin’s sharks' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Crveni popis lišajeva Hrvatske' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Červené seznamy' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Coleoptera (Beetle) – Invertebrate Ireland Online' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Tricoptera (Caddisfly) – InvertebrateIreland Online' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Les mammifères de la Côte d’Ivoire' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Rongeurs et insectivores de Côte d’Ivoire, leur habitat et leur statut de conservation' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Красная книга Азербайджанской Республики' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Красная книга Узбекистана' 
                          ~ 2024,
                          .default = year)) %>% 
  # filter(is.na(year)) %>% 
  select(year, name_orig) %>% 
  distinct(name_orig, .keep_all = T) 

DOUBTS

https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/
A Red List of Benin’s sharks                                           
The Red List of Mammals of South Africa, Swaziland and Lesotho 2024  
1 Crveni popis hrvatskih koralja                                                        
2 Crveni popis lišajeva Hrvatske                                                        
3 Červené seznamy                                                                       
4 Coleoptera (Beetle) – Invertebrate Ireland Online                                     
5 Tricoptera (Caddisfly) – InvertebrateIreland Online                                   
6 Les mammifères de la Côte d’Ivoire                                                    
7 Rongeurs et insectivores de Côte d’Ivoire, leur habitat et leur statut de conservation
8 Красная книга Азербайджанской Республики                                              
9 Красная книга Узбекистана  

Check if URLs are working

# URL incorrect
raw_metadata %>% 
  filter(!grepl('http', url_clean)) %>% 
  distinct(url_clean)
# A tibble: 3 × 1
  url_clean                                                  
  <chr>                                                      
1 NA                                                         
2 ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
3 <NA>                                                       
# URL error
# raw_metadata %>% filter(grepl('http', url_clean)) %>% 
#   distinct(url_clean) %>% 
#   mutate(check_URL = ifelse(map(URLencode(url_clean), http_error), 'not found', 'OK')) %>% 
#   filter(check_URL == 'not found') 

DOUBTS

ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
Nicolau, J. i Dalmau, J., 2008. Llista Vermella\r\ndels Vertebrats d’Andorra. BIOCOM (Biologia i\r\nComunicació) SL i Departament de Patrimoni\r\nNatural del Govern d’Andorra. Informe inèdit

Run code and keep relevant fields

metadata <- 
  # check taxon
  left_join(raw_metadata %>% mutate(group = str_trim(group)) %>%
              select(-c(kingdom,phylum,subphylum,class,order)),
            bind_rows(merged_list %>% filter(!is.na(scientificName)),
                      merged_list %>% filter(is.na(scientificName)) %>%
                        select(group) %>%
                        left_join(. , raw_metadata_taxon_list))) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  # more taxonomic corrections
  #
  #
  #
  #
  # check columns
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  # check source
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) %>% 
  mutate(name_orig = ifelse(grepl('htt', name_orig) & 
                               state_province == 'Grand Est',
                             str_glue('Red list of {group} of Grand Est'), name_orig)) %>% 
  # missing name of source
  
  # check format
  mutate(format = ifelse(format == 'NA', NA, str_squish(format))) %>% 
  mutate(format = ifelse(format == '?', NA, format)) %>% 
  # check language
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  # check location
  mutate(continent = str_squish(str_replace_all(str_squish(continent), '\\|', ' | '))) %>%
  mutate(continent = str_replace_all(continent, '_', ' ')) %>%
  mutate(continent = str_to_title(continent)) %>% 
  mutate(country = ifelse(country == 'NA', NA, str_squish(country))) %>% 
  mutate(country = str_replace_all(country, '_', ' ')) %>%
  mutate(country = ifelse(country == 'USSR', country, str_to_title(country))) %>%
  mutate(country = str_replace_all(country, 'And ', 'and ')) %>% 
  mutate(country = str_replace_all(country, 'Of', 'of')) %>% 
  mutate(country = str_replace_all(country, 'The', 'the')) %>%
  mutate(state_province = ifelse(state_province == 'NA', NA, str_squish(state_province))) %>%
  mutate(state_province = str_to_title(state_province)) %>% 
  mutate(iso_2 = ifelse(iso_2 == 'NA' & country != 'Namibia', NA, str_squish(iso_2))) %>% 
  mutate(iso_3 = ifelse(iso_3 == 'NA', NA, str_squish(iso_3))) %>%  
  mutate(iso_2 = str_squish(str_replace_all(str_squish(iso_2), '\\|', ' | '))) %>%
  mutate(iso_3 = str_squish(str_replace_all(str_squish(iso_3), '\\|', ' | '))) %>%
  mutate(gadm_level_1 = ifelse(gadm_level_1 == 'NA', NA, str_squish(gadm_level_1))) %>%
  mutate(gadm_level_1 = str_to_title(gadm_level_1)) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'And ', 'and ')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'Of', 'of')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'The', 'the')) %>% 
  mutate(gadm_level_2 = ifelse(gadm_level_2 == 'NA', NA, str_squish(gadm_level_2))) %>%
  mutate(gadm_level_2 = str_to_title(gadm_level_2)) %>% 
  mutate(region_custom = ifelse(region_custom == 'NA', NA, str_squish(region_custom))) %>% 
  mutate(region_detail = ifelse(region_detail == 'NA', NA, str_squish(region_detail))) %>% 
  mutate(region_detail = str_squish(str_replace_all(str_squish(region_detail), '\\|', ' | '))) %>%
  # check event
  mutate(year = ifelse(year == 'NA', NA, year)) %>% 
  mutate(year = ifelse(year == '2024?', 2024, year)) %>% 
  mutate(year = as.numeric(year, na.rm=T)) %>% 
  mutate(year = case_when(is.na(year) & 
                            name_orig == 'Crveni popis hrvatskih koralja' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'A Red List of Benin’s sharks' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Crveni popis lišajeva Hrvatske' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Červené seznamy' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Coleoptera (Beetle) – Invertebrate Ireland Online' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Tricoptera (Caddisfly) – InvertebrateIreland Online' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Les mammifères de la Côte d’Ivoire' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Rongeurs et insectivores de Côte d’Ivoire, leur habitat et leur statut de conservation' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Красная книга Азербайджанской Республики' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Красная книга Узбекистана' 
                          ~ 2024,
                          .default = year)) %>% 
  # select columns
  select(id, continent, 
         gadm_level_0 = country, gadm_level_1, gadm_level_2,
         region_custom, region_detail, iso_2, iso_3,
         taxa=group, kingdom, phylum, class, order, family, 
         url = url_clean, source_name= name_orig,
         language, year) 

metadata %>% slice_sample(n=50) %>% arrange(id) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
id continent gadm_level_0 gadm_level_1 gadm_level_2 region_custom region_detail iso_2 iso_3 taxa kingdom phylum class order family url source_name language year
57 Europe Austria Oberösterreich AT AUT Grasshoppers Animalia Arthropoda Insecta Orthoptera https://www.zobodat.at/pdf/BNO_0014_0027-0037.pdf Die Heuschreckenfauna ausgewählter Halbtrockenrasen des Strudengaus und des angrenzenden unteren Mühlviertels (Oberösterreich) Deutsch 2005
171 Europe Romania RO ROU Characeae Plantae Charophyta Charophyceae Charales Characeae https://link.springer.com/article/10.1007/s10531-005-2008-5 Red Data List of Charophytes in the Balkans English 2006
243 Europe Slovenia SI SVN Protozoa Protozoa https://www.uradni-list.si/files/RS_-2002-082-04055-OB~P041-0000.PDF Rdeči seznam praživali (Protozoa) Slovenian 2002
250 Europe Italy IT ITA Flora Plantae https://arts.units.it/bitstream/11368/2964383/2/10.1080%4011263504.2020.1739165.pdf Red list of threatened vascular plants in Italy English 2020
260 Europe Italy IT ITA Flora Plantae https://www.iucn.it/pdf/Comitato_IUCN_Lista_Rossa_della_flora_italiana_policy_species.pdf Lista Rossa de la Flora Italiana 1. POLICY SPECIES e altre specie minacciate Italian 2013
272 Europe Switzerland CH CHE Mammals Animalia Chordata Mammalia https://www.bafu.admin.ch/bafu/de/home/themen/biodiversitaet/publikationen-studien/publikationen/rote-listen-saeugetiere.html Rote Liste der Säugetiere (ohne Fledermäuse). Gefährdete Arten der Schweiz. German 2022
326 Europe Malta MT MLT Flora Plantae https://era.org.mt/red-data-book-for-the-maltese-islands/ Red data book for the Maltese Islands English 1989
357 Africa Réunion RE REU Reef Fishes Animalia Chordata https://inpn.mnhn.fr/espece/listerouge/FR/Poissons_recifaux_Reunion_2022 Liste rouge des poissons de récif de la Réunion (France) French 2022
400 Africa French Southern Territories TF ATF Reptiles Animalia Chordata Reptilia https://inpn.mnhn.fr/espece/listerouge/FR/Reptiles_Eparses_TAAF_2015 Liste rouge des reptiles des îles Éparses (France) French 2015
420 Europe France Bretagne FR FRA Flora Plantae https://inpn.mnhn.fr/espece/programme/listes-rouges/RG/?region=INSEER53 Liste rouge de la Flore de Bretagne French 2015
441 Europe France Corse FR FRA Butterflies Animalia Arthropoda Insecta Lepidoptera https://inpn.mnhn.fr/docs/LR_FCE/LR_regionale/Corse/LRR_Rhopaloceres_Zygenes_Corse_2017.pdf Liste rouge des Papillons diurnes de Corse French 2017
449 Europe France Grand Est FR FRA Fishes Animalia Chordata https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/ Red list of Fishes of Grand Est French 2024
486 Europe France Occitania FR FRA Orthoptera Animalia Arthropoda Insecta Orthoptera https://www.occitanie.developpement-durable.gouv.fr/listes-rouges-regionales-occitanie-a24640.html?lang=fr Liste rouge des Orthoptères d'Occitanie French 2022
519 Europe France Aquitaine FR FRA Mammals Animalia Chordata Mammalia https://www.nouvelle-aquitaine.developpement-durable.gouv.fr/les-listes-rouges-regionales-a9991.html?lang=fr Liste rouge des mammifères continentaux non volants d'Aquitaine French 2020
579 Europe France Nord-Pas-De-Calais FR FRA Reptiles Animalia Chordata Reptilia https://inpn.mnhn.fr/docs/LR_FCE/LR_regionale/Nord-Pas-de-Calais/LRR_Amphibiens_Reptiles_NPDC.pdf Liste rouge régionale – Nord – Pas-de-Calais - Les Amphibiens et Reptiles du Nord - Pas-de-Calais French 2015
610 Europe Bulgaria BG BGR Fauna_en_higher Animalia http://e-ecodb.bas.bg/rdb/en/vol2/texts.html Red Data Book of the Republic of Bulgaria. Vol. 2. Animals English 2011
655 Europe Hungary HU HUN Trees Plantae https://www.researchgate.net/publication/341616466_Voros_Lista_Magyarorszag_veszelyeztetett_fa-_es_cserjefajai_Red_List_Threatened_tree_and_shrub_species_of_Hungary_-_Soproni_Egyetem_Kiado_University_of_Sopron_Press_Sopron_2019_59_pp Vörös Lista. Magyarország veszélyeztetett fa- és cserjefajai Hungarian 2019
689 Europe United Kingdom GB GBR Larger Brachycera Animalia Arthropoda Insecta Diptera https://publications.naturalengland.org.uk/publication/5411344246374400?category=4707656804597760 A review of the status of larger Brachycera flies of Great Britain (NECR192) English 2017
801 Europe Sweden Gästrikland SE SWE Fauna Animalia https://artfakta.se/sok SLU Artdatabanken (2020). Rödlista 2020 Swedish 2020
928 Europe Germany DE DEU Odonata Animalia Arthropoda Insecta Odonata https://www.rote-liste-zentrum.de/en/Download-Vertebrates-1874.html Rote Liste und Gesamtartenliste der Libellen (Odonata) Deutschlands German 2021
999 Europe Germany Baden-Württemberg DE DEU Odonata Animalia Arthropoda Insecta Odonata https://www.lubw.baden-wuerttemberg.de/natur-und-landschaft/rote-listen Rote Liste der Libellen Baden-Württembergs und der Naturräume German 2005
1041 Europe Germany Bayern DE DEU Lamellicornia Animalia Arthropoda Insecta Coleoptera https://www.lfu.bayern.de/natur/rote_liste_tiere/2003/index.htm Rote Liste gefährdeter Blatthornkäfer (Coleoptera: Lamellicornia) Bayerns German 2003
1176 Europe Germany Brandenburg DE DEU Odonata Animalia Arthropoda Insecta Odonata https://lfu.brandenburg.de/lfu/de/ueber-uns/veroeffentlichungen/detail/~08-09-2017-zeitschrift-naturschutz-und-landschaftspflege-in-brandenburg-beilage-zu-heft-4-2017 Rote Liste der Libellen (Odonata) des Landes Brandenburg 2016 German 2016
1249 Europe Germany Bremen DE DEU Stoneflies Animalia Arthropoda Insecta Plecoptera https://www.nlwkn.niedersachsen.de/naturschutz/rote-liste-der-eintags-stein-und-kocherfliegenarten-niedersachsens-38879.html Rote Liste der Eintags-, Stein- und Köcherfliegenarten Niedersachsens German 2000
1256 Europe Germany Niedersachsen DE DEU Heteroptera Animalia Arthropoda Insecta Diptera Sphaeroceridae https://www.nlwkn.niedersachsen.de/naturschutz/rote-liste-der-in-niedersachsen-und-bremen-gefahrdeten-wanzen-38884.html Rote Liste der in Niedersachsen und Bremen gefährdeten Wanzen German 1999
1257 Europe Germany Bremen DE DEU Spiders Animalia Arthropoda Arachnida Araneae https://www.nlwkn.niedersachsen.de/naturschutz/rote-liste-der-in-niedersachsen-und-bremen-gefahrdeten-webspinnen-mit-gesamtartenverzeichnis-38796.html Rote Liste der in Niedersachsen und Bremen gefährdeten Webspinnen mit Gesamtartenverzeichnis German 2004
1378 Europe Germany Mecklenburg-Vorpommern DE DEU Snails Animalia Mollusca Gastropoda https://www.lung.mv-regierung.de/fachinformationen/natur-und-landschaft/artenschutz/rote-listen/ Rote Liste der gefährdeten Schnecken und Muscheln des Binnenlandes Mecklenburg-Vorpommerns German 2002
1418 Europe Germany Nordrhein-Westfalen DE DEU Red Algae Plantae https://www.lanuk.nrw.de/themen/natur/artenschutz/rote-liste Rote Liste und Artenverzeichnis der Rot- und Braunalgen - Rhodophyceae et Fucophyceae - in NRW, 1. Fassung German 2010
1457 Europe Germany Saarland DE DEU Bats Animalia Chordata Mammalia Chiroptera https://rote-liste-saarland.de/ Rote Liste und Gesamtartenliste der Fledermäuse (Chiroptera) des Saarlandes German 2020
1541 Europe Germany Sachsen DE DEU Spiders Animalia Arthropoda Arachnida Araneae https://publikationen.sachsen.de/bdb/artikel/39282 Rote Liste Weberknechte und Webspinnen German 1996
1608 Europe Germany Sachsen-Anhalt DE DEU Cerophytidae Animalia Arthropoda Insecta Coleoptera Cerophytidae https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2040 Rote Listen Sachsen-Anhalt 2020 German 2020
1612 Europe Germany Sachsen-Anhalt DE DEU Latridiidae Animalia Arthropoda Insecta Coleoptera Latridiidae https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2044 Rote Listen Sachsen-Anhalt 2020 German 2020
1625 Europe Germany Sachsen-Anhalt DE DEU Orsodacnidae Animalia Arthropoda Insecta Coleoptera Orsodacnidae https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2047 Rote Listen Sachsen-Anhalt 2020 German 2020
1784 Europe Germany Thüringen DE DEU Leiodidae Animalia Arthropoda Insecta Coleoptera Leiodidae https://tlubn.thueringen.de/naturschutz/rote-listen/kaefer Rote Liste der Aaskäfer, Nestkäfer, Poch- und Diebskäfer, Scheinbockkäfer, Ölkäfer, Düsterkäfer, Schwarzkäfer (Insecta: Coleoptera: Silphidae, Leiodidae pt., Ptinidae, Oedemeridae, Meloidae, Melandryidae, Tenebrionidae) und weiterer ausgewählter Käferfamilien Thüringens 2020 German 2020
1785 Europe Germany Thüringen DE DEU Ptinidae Animalia Arthropoda Insecta Coleoptera Ptinidae https://tlubn.thueringen.de/naturschutz/rote-listen/kaefer Rote Liste der Aaskäfer, Nestkäfer, Poch- und Diebskäfer, Scheinbockkäfer, Ölkäfer, Düsterkäfer, Schwarzkäfer (Insecta: Coleoptera: Silphidae, Leiodidae pt., Ptinidae, Oedemeridae, Meloidae, Melandryidae, Tenebrionidae) und weiterer ausgewählter Käferfamilien Thüringens 2020 German 2020
1821 Europe Germany Thüringen DE DEU Zygaenidae Animalia Arthropoda Insecta Lepidoptera Zygaenidae https://tlubn.thueringen.de/naturschutz/rote-listen/schmetterlinge Rote Liste der Widderchen (Insecta: Lepidoptera: Zygaenidae) Thüringens 2010 German 2010
1898 Europe Liechtenstein LI LIE Reptiles Animalia Chordata Reptilia https://www.llv.li/serviceportal2/amtsstellen/amt-fuer-umwelt/publikationen/naturkindliche-forschung/b23-reptilien.pdf Die Reptilien des Fürstentums Liechtenstein German 2006
2102 Asia South Korea KR KOR Endangered Mammals Animalia Chordata Mammalia https://www.researchgate.net/publication/330162745_Mammals_of_Korea MAMMALS OF KOREA English 1997
2191 Asia Georgia GE GEO Fauna Animalia https://environment.cenn.org/ge/%E1%83%92%E1%83%90%E1%83%93%E1%83%9B%E1%83%9D%E1%83%AC%E1%83%94%E1%83%A0%E1%83%90/%E1%83%A1%E1%83%90%E1%83%A5%E1%83%90%E1%83%A0%E1%83%97%E1%83%95%E1%83%94%E1%83%9A%E1%83%9D%E1%83%A1-%E1%83%AC%E1%83%98%E1%83%97%E1%83%94%E1%83%9A%E1%83%98-%E1%83%AC%E1%83%98%E1%83%92%E1%83%9C%E1%83%98/ საქართველოს წითელი წიგნი Georgian 1982
2205 Asia Kyrgyzstan KG KGZ Flora Plantae http://oopt-back.at.kg/uploads/publication/Red_Book_KR.pdf KYRGYZ REPUBLIC RED DATA BOOK (SECOND EDITION) Kyrgyz 2006
2227 Asia Thailand TH THA Birds Animalia Chordata Aves https://patricklepetit.jalbum.net/_FAUNA%20OF%20THAILAND/LIBRARY/Birds%20of%20Thailand.pdf Thailand Red Data : Birds English 2005
2346 Europe Belgium Vlaanderen BE BEL Flora Plantae https://www.vlaanderen.be/inbo/rode-lijsten/ Vlaanderen Instituut voor Natuur- en Bosonderzoek - Rode lijsten Dutch 2024
2407 Europe Netherlands NL NLD Bryophytes Plantae https://www.blwg.nl/mossen/onderzoek/rapporten/BLWGRapport14.pdf Basisrapport voor de Rode Lijst Mossen 2012 Dutch 2012
2505 Europe Poland PL POL Apoidea Animalia Arthropoda Insecta Hymenoptera https://rcin.org.pl/iop/dlibra/publication/90278 Czerwona lista zwierząt ginących i zagrożonych w Polsce: Pszczoły Apoidea Polish 1992
2566 North America United States Virgin Islands VI VIR Flora Plantae https://dpnr.vi.gov/wp-content/uploads/2022/10/VI-WAP-Vol-2-Habitats-Species.pdf United States Virgin Islands Wildlife Action Plan English 2018
2647 South America Chile CL CHL Flora Plantae http://especies.mma.gob.cl/CNMWeb/Web/WebCiudadana/especies_amenazadas.aspx Especies Amenazadas de Chile Spanish 2024
2703 South America Peru PE PER Cactaceae Plantae Tracheophyta Magnoliopsida Caryophyllales Cactaceae http://www.scielo.org.pe/pdf/rpb/v13n2/v13n02a027.pdf Cactaceae endémicas del Perú Spanish 2006
2955 Asia Japan JP JPN Birds Animalia Chordata Aves https://ikilog.biodic.go.jp/Rdb/booklist レッドリスト2012 Japanese 2012
3046 Europe Russia Adygey RU RUS Flora Plantae https://www.plantarium.ru/lang/en/page/redbook/id/322.html Красная книга Республики Адыгея, 2021 г. Russian 2021
3105 Europe Russia Ivanovo RU RUS Fungi Fungi https://rusmam.ru/static/literature/2f/eb/2feb859ebc6b2ab99deed732f215a874.pdf Красная книга Ивановской области. Т. 1 Russian 2010

Data audit

Summary

Code
metadata %>% 
  summarise(`Number of records` = n(),
            `Number of sources` = n_distinct(source_name),
            `Number of taxa` = n_distinct(taxa),
            `Animalia records` = sum(kingdom == 'Animalia'),
            `Plantae records` = sum(kingdom == 'Plantae'),
            `Fungi records` = sum(kingdom == 'Fungi'),
            Countries = n_distinct(gadm_level_0),
            `Sub-national territories` = n_distinct(gadm_level_1)) %>% 
  t() %>% `colnames<-`(c("N")) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
N
Number of records 3200
Number of sources 2093
Number of taxa 487
Animalia records 2190
Plantae records 898
Fungi records 105
Countries 171
Sub-national territories 223

Geographic coverage

Code
world <- geodata::world(resolution = 3, level = 0, path = 'data/')


world_records <- left_join(st_as_sf(world), metadata %>%
  separate_rows(iso_3, sep = '\\|') %>% 
  select(iso_2, GID_0=iso_3, source_name)) %>% 
  group_by(GID_0, NAME_0) %>% 
  summarise(n_records = ifelse(n_distinct(source_name, na.rm = TRUE)==0, 
                               0, n_distinct(source_name, na.rm = TRUE)),
            iso_2_string = ifelse(n_records>0,
                                  paste(iso_2, collapse = ';'), NA))

plot_figure_1 <- tm_shape(world_records %>% 
           select(-iso_2_string) %>% 
           mutate(n_records=ifelse(n_records==0,
                                   NA, n_records))) +
  tm_polygons(fill = 'n_records',fill_alpha = 0.9,
              col='grey40', col_alpha = 0.2,
              fill.scale = tm_scale_intervals(n = 6, 
                                              #style = 'jenks', 
                                              breaks = c(1,5,10,20,100,979),
                                              values = 'brewer.reds',
                                              value.na = 'grey80',
                                              label.na = '0'),
              fill.legend = tm_legend(item.space = 0, item.na.space = 0,
                                      title = 'Number of sources', 
                                      reverse=T, 
                                      # frame=F, 
                                      frame.lwd = 0.1,
                                      bg.color = 'white')) +
  tm_layout(legend.outside = T, 
            legend.position = c('left','bottom'),  frame=F) +
  tm_crs(property='global')

tmap_mode('plot')
plot_figure_1

Code
tmap_mode('view')
plot_figure_1

Taxonomic coverage

Code
metadata %>% 
    filter(!is.na(class)) %>% 
    filter(kingdom %in% c('Animalia')) %>% 
    group_by(kingdom, class) %>% 
    summarise(n_sources_taxa = n_distinct(source_name)) %>% 
    arrange(desc(n_sources_taxa)) %>% 
    slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom class Number of sources
Animalia Insecta 568
Animalia Aves 170
Animalia Mammalia 158
Animalia Reptilia 133
Animalia Amphibia 121
Code
metadata %>% 
    filter(!is.na(order)) %>% 
    filter(kingdom %in% c('Plantae')) %>% 
    group_by(kingdom, order) %>% 
    summarise(n_sources_taxa = n_distinct(source_name)) %>% 
    arrange(desc(n_sources_taxa)) %>% 
    slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom order Number of sources
Plantae Charales 18
Plantae Asparagales 5
Plantae Caryophyllales 2
Plantae Malvales 2
Plantae Apiales 1
Code
# fish sources
metadata %>% 
  filter(grepl('fish', taxa, ignore.case=T)) %>% 
  filter(!grepl('crayfish', taxa, ignore.case=T)) %>% 
  distinct(source_name) %>% count() %>% 
  rename(`Number of fish sources` = n) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of fish sources
102
Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n_distinct(source_name)) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Temporal coverage

Code
metadata %>% 
  group_by(year) %>% 
  summarise(publications_year = n_distinct(source_name)) %>% 
  ggplot(aes(x = year, y = publications_year)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  ylim(c(0,150)) + xlim(c(1975,2025)) +
  scale_x_continuous(n.breaks = 15) +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean()